########### MANUSCRIPT: The Role of Education and Attitudes in Cooking Fuel Choice: Evidence from two states in India
########### JOURNAL: ENERGY FOR SUSTAINABLE DEVELOPMENT
########### AUTHORS: CARLOS F. GOULD/1 AND JOHANNES URPELAINEN/2
########### AFFILIATIONS: 1/COLUMBIA UNIVERSITY MAILMAN SCHOOL OF PUBLIC HEALTH AND 2/JOHNS HOPKINS SCHOOL OF ADVANCED INTERNATIONAL STUDIES
########### PURPOSE: THIS IS CODE #6 THAT RUNS REGRESSIONS FOR THE SUPPORTING INFORMATION




# Figure A8 ------------------

# Conditional logit models instead of fixed effects models

creg1_K <- clogit(Owns_LPG ~ 
                    Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                    Exp_log + NumberAdults + NumberChildrenUnder18 +
                    AgeRespondent + 
                    Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                    Religion_Muslim + Religion_Christian + Religion_Other + 
                    Rural + 
                    strata(District), data=kerala)

creg1_R <- clogit(Owns_LPG ~ 
                    Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                    Exp_log + NumberAdults + NumberChildrenUnder18 +
                    AgeRespondent + 
                    Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                    Religion_Muslim + Religion_Christian + Religion_Other + 
                    Rural + 
                    strata(District), data=rajasthan)


DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

creg12_df <- rbind(tidy(creg1_K) %>% mutate(model="Kerala"),
                   tidy(creg1_R) %>% mutate(model="Rajasthan")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))


dwplot(creg12_df, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey50", "black"),
                     labels=c("Kerala (Conditional Logit)", 
                              "Rajasthan (Conditional Logit)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Kerala (Conditional Logit)", 
                              "Rajasthan (Conditional Logit)")) +
  ggtitle("Association between CWE Education and LPG Ownership",
          subtitle = "Kerala (N=3929) and Rajasthan Households (N=6077)") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.80,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22))



# Figure A9 -------------------------

# Coecient plot for linear regressions showing the associations between perceptions and
# LPG ownership in individual regressions in Kerala households. Regressions control for covariates
# and district xed eects (not shown). Coecients are not exponentiated. Whiskers show 95% con-
#   dence intervals. Stars show statistical signicance after Benjamini-Hochberg method for controlling
# for false discovery rate: * P<0.10, ** P<0.05, *** P<0.01.



p_lpg_reg_table <- data.frame(matrix(NA, 1, 6))
colnames(p_lpg_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value", "perception")

for (i in 1:30) {
  p <- lm(Owns_LPG ~ unlist(kerala[,1175+i]) +
            Exp_log + NumberAdults + NumberChildrenUnder18 +
            AgeRespondent + 
            Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
            Religion_Muslim + Religion_Christian + Religion_Other + 
            Rural + factor(District), data = kerala)
  tidyp <- tidy(p)
  tidyp$perception <- perceptions_list[i]
  p_lpg_reg_table <- rbind(p_lpg_reg_table, tidyp[2,])
}

p_lpg_reg_table <- p_lpg_reg_table[-1,]


perceptions_list_ordered <- c("Chulha in the kitchen is needed, especially during holidays and with guests", 
                              "Must light chulha to start the data, for prosperity and wealth", 
                              "Chulha smoke purifies the home", 
                              "Cooking with firewood is tradition", 
                              "Food tastes better cooked on the chulha", 
                              "Food cooked on the chulha is preferred by my family", 
                              
                              "Lighting the chulha is the hardest part of cooking", 
                              "Cooking with the chulha takes a long time",
                              "Cleaning the chulha takes a long time",
                              "Smoke spoils the walls and utensils",
                              
                              "Firewood is easily available", 
                              "Firewood is used because it is free", 
                              "I have to continuously monitor cooking with firewood", 
                              "Firewood costs more during the rainy seasons", 
                              "When firewood is scarce, we have to travel further to get it", 
                              
                              "We inhale a lot of smoke, sometimes it is suffocating",
                              "Sometimes smoke irritates the eyes, gives headache, and backache", 
                              "Smoke affects our children's health", 
                              "With LPG we don't have smoke issues", 
                              
                              "I don't tell people if I'm sick",
                              "When the kids are sick, we take them to the doctor", 
                              "We trust the advice of doctors", 
                              
                              "Cooking with LPG is easy, it takes less time", 
                              "LPG Adoption will be automatic among the next generation", 
                              "LPG or induction is unnafordable entirely",
                              "LPG cylinders are costly", 
                              "If I have the money, LPG cooking is better",
                              
                              "Cooking is all about making tasty food so my family has satisfaction", 
                              "Cooking is about experimenting with new dishes", 
                              "I follow all traditional practices strictly")


p_lpg_reg_table$p.value.bh <- p.adjust(p_lpg_reg_table$p.value, method = "BH", n = length(p_lpg_reg_table$p.value))
p_lpg_reg_table$p.value.bh.star <- ifelse(p_lpg_reg_table$p.value.bh<0.01, "***",
                                        ifelse(p_lpg_reg_table$p.value.bh<0.05, "**",
                                               ifelse(p_lpg_reg_table$p.value.bh<0.10, "*","")))


ggplot(p_lpg_reg_table, aes(x=factor(perception), y=estimate,
                          ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star)) + 
  geom_point() +
  geom_text(aes(label=p.value.bh.star, y=(estimate - 2.3*std.error), x=perception), vjust=.7, position = position_dodge(width = .5), size = 6) +
  geom_pointrange() + 
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  scale_x_discrete(limits=rev(perceptions_list_ordered)) +
  theme_bw() +
  xlab("") + ylab("") +
  coord_flip() + 
  theme(axis.text = element_text(size=18),
        strip.text.x = element_text(size=10),
        strip.text.y = element_text(size=14)) 




# Figure A10 ------------------------- 
  
#   Coecient plot for linear regressions showing the associations between perceptions
# and LPG ownership in individual regressions in Rajasthan households. Regressions control for
# covariates and district xed eects (not shown). Coecients are not exponentiated. Whiskers show
# 95% condence intervals. Stars show statistical signicance after Benjamini-Hochberg method for
# controlling for false discovery rate: * P<0.10, ** P<0.05, *** P<0.01.


perceptions_rajasthan <- c(
  
  "q145a_31 502. Chapati / Roti tastes good when made on Chulha , hence, we have to use it to cook chapatti's for the family ",              
  "q145a_32 503. Few traditional dishes like dalbati can be cooked only in chulha, so we have to keep a chulha at home ",                    
  "q145a_33 504. My in-laws / family members only like the food cooked on Chulha, they will not have food cooked in LPG ",                   
  "q145a_34 505. I use a chula because I just don’t have any other alternative ",                                                            
  "q145a_35 506. If my family allows, I would definitely go for other type of fuel which are fast and efficient ",                           
  "q145a_36 507. I have seen everyone in my family using firewood since my childhood and can't think of any other method of cooking ",       
  "q145a_37 508. Future generation will adopt LPG / other clean cooking options automatically ",                                             
  "q145a_38 509. The food cooked in chulha is good for health ",                                                                             
  "q145a_39 510. The smoke from the chulha affect the health of my kids. ",                                                                  
  "q145a_40 511. Using a chulha is extremely harmful for my eyes as well as for other family members ",                                      
  "q145a_41 512. We walk a very long distance to collect firewood; we often get back pain, shoulder and leg pain. ",                         
  "q145a_42 513. Cooking using LPG ia a much cleaner and healthier method of cooking ",                                                      
  "q145a_43 514. Women who uses LPG get more time for themselves",                                                                          
  "q145a_44 515. More time is spent on lighting the Chulha than even cooking food on it ",                                                   
  "q145a_45 516. There is not much variation in cooking time for chulha / sigri / LPG ",
  "q145a_1 502.One should definitely have a Chulla at kitchen",                                                                             
  "q145a_2 503.We have to start a day by lighting the chulha (traditional cookstove) for prosperity and wealth",                            
  "q145a_3 504.Smoke from chulha purifies the house you agr",                                                                               
  "q145a_4 505.I use firewood because cooking using firewood is part of our tradition and being used since generations",                    
  "q145a_5 506.Food tastes better if it has been prepared on chulha (traditional cookstove) specially fish, chicken and other non-veg items",
  "q145a_6 507.Elders / family members only prefer food cooked in Chulha",                                                                  
  "q145a_7 508.Cooking is all about making tasty food so that children’s and husband have food with satisfaction",                          
  "q145a_8 509.Cooking is trying different cuisine, experimenting new dishes . ",                                                           
  "q145a_9 510.I strictly follow all traditional practices e.g. getting dressed, the way I cook, or celebrating festivals etc.",            
  "q145a_10 511.Future generation will adopt LPG automatically.",                                                                          
  "q145a_11 512.Time taken to cook food in chulha is too much, we have to start our day early",                                             
  "q145a_12 513.Cleaning chulha every morning is difficult we have to collect the ash .",                                                   
  "q145a_13 514.Lighting the chulha (traditional cookstove) is the most difficult part of the cooking",                                     
  "q145a_14 515.Firewood is easily available, one can easily collect firewood from the premises / neighborhood ",                           
  "q145a_15 516.Continuous monitoring is required while cooking on firewood or else the fire extinguishes",                                 
  "q145a_16 517.During rains we don’t get firewood, we have to pay more for getting it ",                                                   
  "q145a_17 518.In times of  scarcity we have to travel long distances  to collect wood ",                                                  
  "q145a_18 519.While using firewood kitchen walls and utensils get spoiled completely ",                                                   
  "q145a_19 520.Cooking on LPG  is easy, food gets prepared soon, we could save time .",                                                    
  "q145a_20 521.We inhale a lot of smoke, sometimes we feel like suffocating due to smoke",                                                 
  "q145a_21 522.We also get irritations in eyes, headache, backache because of using firewood ",                                            
  "q145a_22 523.Our child’s health get affected due to smoke",                                                                              
  "q145a_23 524.We don’t face the issue of smoke while cooking on LPG/induction .",                                                         
  "q145a_24 525.I prefer not to tell my husband or anyone in family, even if I am not well",                                                
  "q145a_25 526.When children are not well, it is better to visit the doctors/health care professional immidiately. ",                      
  "q145a_26 527.We can always rely on the advices given by doctors/medical professionals",                                                  
  "q145a_27 528.I get free firewood that is the reason I’m using firewood .",                                                               
  "q145a_28 529.I can not afford LPG cylinder/Induction stove at all",                                                                      
  "q145a_29 530.I can not afford regular cooking on LPG cylinder/Induction stove",                                                          
  "q145a_30 531.If we have money, it is better to buy LPG instead of wood"
  
)


p_r_lpg_reg_table <- data.frame(matrix(NA, 1, 7))
colnames(p_r_lpg_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value","i","perception")


for (i in 0:49) {
  
  possibleError <- tryCatch(
    no_perception <- ifelse(is.na(unlist(rajasthan[1,1315+i]))==TRUE, 1, 0)
  )
  
  if(possibleError!=1) {
    
    p <- lm(Owns_LPG ~ unlist(rajasthan[,1315+i]) +
              Exp_log + NumberAdults + NumberChildrenUnder18 +
              AgeRespondent + 
              Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
              Religion_Muslim + Religion_Christian + Religion_Other + 
              Rural + factor(District), data = rajasthan)
    tidyp <- tidy(p)
    tidyp$i <- i
    tidyp$perception <- perceptions_rajasthan[i]
    p_r_lpg_reg_table <- rbind(p_r_lpg_reg_table, tidyp[2,])
    
  }
}

p_r_lpg_reg_table <- p_r_lpg_reg_table[-1,]

p_r_lpg_reg_table$p.value.bh <- p.adjust(p_r_lpg_reg_table$p.value, method = "BH", n = length(p_r_lpg_reg_table$p.value))
p_r_lpg_reg_table$p.value.bh.star <- ifelse(p_r_lpg_reg_table$p.value.bh<0.01, "***",
                                        ifelse(p_r_lpg_reg_table$p.value.bh<0.05, "**",
                                               ifelse(p_r_lpg_reg_table$p.value.bh<0.10, "*","")))


# NOTE: Order of perceptions and formatting is off from manuscript version but the estimates are correct.

ggplot(p_r_lpg_reg_table, aes(x=factor(perception), y=estimate,
                          ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star)) + 
  geom_point() +
  geom_text(aes(label=p.value.bh.star, x=perception, y=((estimate-1.96*std.error)-0.01)), vjust=0.75, size = 6) +
  geom_pointrange() + 
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  theme_bw() +
  xlab("") + ylab("") +
  ggtitle("Rajasthan individual perceptions") +
  coord_flip() + 
  theme(axis.text = element_text(size=15),
        strip.text.x = element_text(size=10),
        strip.text.y = element_text(size=14)) 








# Figure A11 --------------------------

# Coecient plot for linear regressions showing the associations between education
# level achieved for both the chief wage earner and females and individual perceptions in Kerala
# households. Regressions control for covariates and district xed eects (not shown). Coecients
# are not exponentiated. Whiskers show 95% condence intervals. Stars show statistical signicance
# after Benjamini-Hochberg method for controlling for false discovery rate: * P<0.10, ** P<0.05,
# *** P<0.01.



perceptions_list <- c("Chulha in the kitchen is needed, especially during holidays and with guests", 
                      "Must light chulha to start the data, for prosperity and wealth", 
                      "Chulha smoke purifies the home", 
                      "Cooking with firewood is tradition", 
                      "Food tastes better cooked on the chulha", 
                      "Food cooked on the chulha is preferred by my family", 
                      "Cooking is all about making tasty food so my family has satisfaction", 
                      "Cooking is about experimenting with new dishes", 
                      "I follow all traditional practices strictly", 
                      "LPG Adoption will be automatic among the next generation", 
                      "Cooking with the chulha takes a long time",
                      "Cleaning the chulha takes a long time", 
                      "Lighting the chulha is the hardest part of cooking", 
                      "Firewood is easily available", 
                      "I have to continuously monitor cooking with firewood", 
                      "Firewood costs more during the rainy seasons", 
                      "When firewood is scarce, we have to travel further to get it", 
                      "Smoke spoils the walls and utensils",
                      "Cooking with LPG is easy, it takes less time", 
                      "We inhale a lot of smoke, sometimes it is suffocating",
                      "Sometimes smoke irritates the eyes, gives headache, and backache", 
                      "Smoke affects our children's health", 
                      "With LPG we don't have smoke issues", 
                      "I don't tell people if I'm sick",
                      "When the kids are sick, we take them to the doctor", 
                      "We trust the advice of doctors", 
                      "Firewood is used because it is free", 
                      "LPG or induction is unnafordable entirely",
                      "LPG cylinders are costly", 
                      "If I have the money, LPG cooking is better")

p_reg_table <- data.frame(matrix(NA, 1, 6))
colnames(p_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value", "perception")

for (i in 1:30) {
  
  p <- lm(unlist(kerala[,1175+i]) ~ Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
            Exp_log + NumberAdults + NumberChildrenUnder18 +
            AgeRespondent + 
            Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
            Religion_Muslim + Religion_Christian + Religion_Other + 
            Rural + 
            factor(District), data = kerala)
  tidyp <- tidy(p)
  tidyp$perception <- perceptions_list[i]
  p_reg_table <- rbind(p_reg_table, tidyp[2:4,])
  
  f <- lm(unlist(kerala[,1175+i]) ~ Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
            Exp_log + NumberAdults + NumberChildrenUnder18 +
            AgeRespondent + 
            Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
            Religion_Muslim + Religion_Christian + Religion_Other + 
            Rural + 
            factor(District), data = kerala)
  tidyf <- tidy(f)
  tidyf$perception <- perceptions_list[i]
  p_reg_table <- rbind(p_reg_table, tidyf[2:4,])
  
}

p_reg_table <- p_reg_table[-1,]

p_reg_table$education <- c(rep("CWE",3), rep("Female", 3))

p_reg_table[p_reg_table=="Education_CWE_primary"] <- "Primary School"
p_reg_table[p_reg_table=="Education_CWE_middle_high"] <- "Middle and High School"
p_reg_table[p_reg_table=="Education_CWE_greaterhigh"] <- "Greather than High School"

p_reg_table[p_reg_table=="Education_female_primary"] <- "Primary School"
p_reg_table[p_reg_table=="Education_female_middle_high"] <- "Middle and High School"
p_reg_table[p_reg_table=="Education_female_greaterhigh"] <- "Greather than High School"


p_reg_table$p.value.bh <- p.adjust(p_reg_table$p.value, method = "BH", n = length(p_reg_table$p.value))
p_reg_table$p.value.bh.star <- ifelse(p_reg_table$p.value.bh<0.01, "***",
                                      ifelse(p_reg_table$p.value.bh<0.05, "**",
                                             ifelse(p_reg_table$p.value.bh<0.10, "*","")))


ggplot(p_reg_table, aes(x=perception, y=estimate,
                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star,
                        shape=term)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, y=(estimate - 2.25*std.error), x=perception), 
            vjust=.7, position = position_dodge(width = .5), size = 7) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  theme_bw() +
  xlab("") + ylab("") +
  ggtitle("Kerala Perceptions and Education") +
  coord_flip() + 
  theme(legend.position = c(.625,.96),
        legend.box.background = element_rect(colour = "black"),
        legend.title = element_blank(),
        legend.text = element_text(size=15),
        plot.title = element_text(size=18),
        axis.text = element_text(size=15),
        strip.text.x = element_text(size=16),
        strip.text.y = element_text(size=14)) + 
  facet_grid(.~education)







# Figure A12 --------------------------
  
#   Coecient plot for linear regressions showing the associations between education
# level achieved for both the chief wage earner and females and individual perceptions in Rajasthan
# households. Regressions control for covariates and district xed eects (not shown). Coecients
# are not exponentiated. Whiskers show 95% condence intervals. Stars show statistical signicance
# after Benjamini-Hochberg method for controlling for false discovery rate: * P<0.10, ** P<0.05,
# *** P<0.01.



f_r_lpg_reg_table <- data.frame(matrix(NA, 1, 8))
colnames(f_r_lpg_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value","i","perception", "model")


for (i in 0:49) {
  
  possibleError <- tryCatch(
    no_perception <- ifelse(is.na(unlist(rajasthan[1,1315+i]))==TRUE, 1, 0)
  )
  
  if(possibleError!=1) {
    
    f <- lm(unlist(rajasthan[,1315+i]) ~ Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
              Exp_log + NumberAdults + NumberChildrenUnder18 +
              AgeRespondent + 
              Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
              Religion_Muslim + Religion_Christian + Religion_Other + 
              Rural + factor(District), data = rajasthan)
    tidyf <- tidy(f)
    tidyf$i <- i
    tidyf$perception <- perceptions_rajasthan[i]
    tidyf$model <- "Female Respondent"
    f_r_lpg_reg_table <- rbind(f_r_lpg_reg_table, tidyf[2:4,])
    
    c <- lm(unlist(rajasthan[,1315+i]) ~ Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
              Exp_log + NumberAdults + NumberChildrenUnder18 +
              AgeRespondent + 
              Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
              Religion_Muslim + Religion_Christian + Religion_Other + 
              Rural + factor(District), data = rajasthan)
    
    tidyc <- tidy(c)
    tidyc$i <- i
    tidyc$perception <- perceptions_rajasthan[i]
    tidyc$model <- "CWE"
    f_r_lpg_reg_table <- rbind(f_r_lpg_reg_table, tidyc[2:4,])
    
  }
}

f_r_lpg_reg_table <- f_r_lpg_reg_table[-1,]

f_r_lpg_reg_table[f_r_lpg_reg_table=="Education_CWE_primary"] <- "Primary School"
f_r_lpg_reg_table[f_r_lpg_reg_table=="Education_CWE_middle_high"] <- "Middle and High School"
f_r_lpg_reg_table[f_r_lpg_reg_table=="Education_CWE_greaterhigh"] <- "Greather than High School"

f_r_lpg_reg_table[f_r_lpg_reg_table=="Education_4_FemaleSpouse_primary"] <- "Primary School"
f_r_lpg_reg_table[f_r_lpg_reg_table=="Education_4_FemaleSpouse_middle_high"] <- "Middle and High School"
f_r_lpg_reg_table[f_r_lpg_reg_table=="Education_4_FemaleSpouse_greaterhigh"] <- "Greather than High School"


f_r_lpg_reg_table$p.value.bh <- p.adjust(f_r_lpg_reg_table$p.value, method = "BH", n = length(f_r_lpg_reg_table$p.value))
f_r_lpg_reg_table$p.value.bh.star <- ifelse(f_r_lpg_reg_table$p.value.bh<0.01, "***",
                                            ifelse(f_r_lpg_reg_table$p.value.bh<0.05, "**",
                                                   ifelse(f_r_lpg_reg_table$p.value.bh<0.10, "*","")))


ggplot(f_r_lpg_reg_table, aes(x=perception, y=estimate,
                          ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star,
                          shape=term)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, x=perception, y=((estimate-1.96*std.error)-0.04)), position = position_dodge(width = .5), vjust=0.75, size = 6) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  theme_bw() +
  xlab("") + ylab("") +
  ggtitle("Rajasthan Perceptions and Education") +
  coord_flip() + 
  theme(legend.position = c(-1,1.01),
        legend.box.background = element_rect(colour = "black"),
        legend.title = element_blank(),
        legend.text = element_text(size=15),
        plot.title = element_text(size=18),
        axis.text = element_text(size=16),
        strip.text.x = element_text(size=16),
        strip.text.y = element_text(size=14)) +
  facet_grid(.~model)






# Figure A13 -----------------------------

# Coecient plot for linear regressions showing the associations between (A) CWE and
# (B) female education and LPG ownership, with and without including all individual perceptions in
# Kerala households. Regressions control for covariates and district xed eects (not shown). Coef-
#   cients are not exponentiated. Whiskers show 95% condence intervals and stars show statistical
# signicance using Benjamini-Hochberg method for controlling for False Discovery Rate.



mediation_k_lpg_reg_table <- data.frame(matrix(NA, 1, 7))
colnames(mediation_k_lpg_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value","perception","model")


for (i in 1:30) {
  
  
  f <- lm(Owns_LPG ~ Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
            unlist(kerala[,1175+i]) +
            Exp_log + NumberAdults + NumberChildrenUnder18 +
            AgeRespondent + 
            Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
            Religion_Muslim + Religion_Christian + Religion_Other + 
            Rural + factor(District), data = kerala)
  tidyf <- tidy(f)
  tidyf$perception <- perceptions_list[i]
  tidyf$model <- "Female Respondent"
  mediation_k_lpg_reg_table <- rbind(mediation_k_lpg_reg_table, tidyf[2:4,])
  
  c <- lm(Owns_LPG ~ Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
            unlist(kerala[,1315+i]) +
            Exp_log + NumberAdults + NumberChildrenUnder18 +
            AgeRespondent + 
            Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
            Religion_Muslim + Religion_Christian + Religion_Other + 
            Rural + factor(District), data = kerala)
  
  tidyc <- tidy(c)
  tidyc$perception <- perceptions_list[i]
  tidyc$model <- "CWE"
  mediation_k_lpg_reg_table <- rbind(mediation_k_lpg_reg_table, tidyc[2:4,])
  
}


mediation_k_lpg_reg_table <- mediation_k_lpg_reg_table[-1,]
mediation_k_lpg_reg_table[mediation_k_lpg_reg_table=="Education_CWE_primary"] <- "Primary School"
mediation_k_lpg_reg_table[mediation_k_lpg_reg_table=="Education_CWE_middle_high"] <- "Middle and High School"
mediation_k_lpg_reg_table[mediation_k_lpg_reg_table=="Education_CWE_greaterhigh"] <- "Greather than High School"

mediation_k_lpg_reg_table[mediation_k_lpg_reg_table=="Education_female_primary"] <- "Primary School"
mediation_k_lpg_reg_table[mediation_k_lpg_reg_table=="Education_female_middle_high"] <- "Middle and High School"
mediation_k_lpg_reg_table[mediation_k_lpg_reg_table=="Education_female_greaterhigh"] <- "Greather than High School"


mediation_k_lpg_reg_table$p.value.bh <- p.adjust(mediation_k_lpg_reg_table$p.value, method = "BH", n = length(mediation_k_lpg_reg_table$p.value))
mediation_k_lpg_reg_table$p.value.bh.star <- ifelse(mediation_k_lpg_reg_table$p.value.bh<0.01, "***",
                                                    ifelse(mediation_k_lpg_reg_table$p.value.bh<0.05, "**",
                                                           ifelse(mediation_k_lpg_reg_table$p.value.bh<0.10, "*","")))



ggplot(mediation_k_lpg_reg_table, aes(x=perception, y=estimate,
                                      ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star,
                                      shape=term)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, y=(estimate - 2.25*std.error), x=perception), vjust=.75, position = position_dodge(width = .5), size = 5) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  theme_bw() +
  xlab("") + ylab("") +
  ggtitle("Mediation of Education-LPG by Perceptions in Kerala") +
  coord_flip() + 
  theme(legend.position = c(0.88,0.95),
        legend.box.background = element_rect(colour = "black"),
        legend.title = element_blank(),
        legend.text = element_text(size=13),
        plot.title = element_text(size=18),
        axis.text = element_text(size=15),
        strip.text.x = element_text(size=16),
        strip.text.y = element_text(size=14)) +
  facet_grid(.~model)






# Figure A14 -----------------------------

# Coecient plot for linear regressions showing the associations between (A) CWE and
# (B) female education and LPG ownership, with and without including all individual perceptions in
# Rajasthan households. Regressions control for covariates and district xed eects (not shown). Co-
#   ecients are not exponentiated. Whiskers show 95% condence intervals and stars show statistical
# signicance using Benjamini-Hochberg method for controlling for False Discovery Rate.



mediation_r_lpg_reg_table <- data.frame(matrix(NA, 1, 8))
colnames(mediation_r_lpg_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value","i","perception", "model")


for (i in 0:49) {
  
  possibleError <- tryCatch(
    no_perception <- ifelse(is.na(unlist(rajasthan[1,1315+i]))==TRUE, 1, 0)
  )
  
  if(possibleError!=1) {
    
    f <- lm(Owns_LPG ~ Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
              unlist(rajasthan[,1315+i]) +
              Exp_log + NumberAdults + NumberChildrenUnder18 +
              AgeRespondent + 
              Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
              Religion_Muslim + Religion_Christian + Religion_Other + 
              Rural + factor(District), data = rajasthan)
    tidyf <- tidy(f)
    tidyf$i <- i
    tidyf$perception <- perceptions_rajasthan[i]
    tidyf$model <- "Female Respondent"
    mediation_r_lpg_reg_table <- rbind(mediation_r_lpg_reg_table, tidyf[2:4,])
    
    c <- lm(Owns_LPG ~ Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
              unlist(rajasthan[,1315+i]) +
              Exp_log + NumberAdults + NumberChildrenUnder18 +
              AgeRespondent + 
              Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
              Religion_Muslim + Religion_Christian + Religion_Other + 
              Rural + factor(District), data = rajasthan)
    
    tidyc <- tidy(c)
    tidyc$i <- i
    tidyc$perception <- perceptions_rajasthan[i]
    tidyc$model <- "CWE"
    mediation_r_lpg_reg_table <- rbind(mediation_r_lpg_reg_table, tidyc[2:4,])
    
  }
}

mediation_r_lpg_reg_table[mediation_r_lpg_reg_table=="Education_CWE_primary"] <- "Primary School"
mediation_r_lpg_reg_table[mediation_r_lpg_reg_table=="Education_CWE_middle_high"] <- "Middle and High School"
mediation_r_lpg_reg_table[mediation_r_lpg_reg_table=="Education_CWE_greaterhigh"] <- "Greather than High School"

mediation_r_lpg_reg_table[mediation_r_lpg_reg_table=="Education_4_FemaleSpouse_primary"] <- "Primary School"
mediation_r_lpg_reg_table[mediation_r_lpg_reg_table=="Education_4_FemaleSpouse_middle_high"] <- "Middle and High School"
mediation_r_lpg_reg_table[mediation_r_lpg_reg_table=="Education_4_FemaleSpouse_greaterhigh"] <- "Greather than High School"


mediation_r_lpg_reg_table$p.value.bh <- p.adjust(mediation_r_lpg_reg_table$p.value, method = "BH", n = length(mediation_r_lpg_reg_table$p.value))
mediation_r_lpg_reg_table$p.value.bh.star <- ifelse(mediation_r_lpg_reg_table$p.value.bh<0.01, "***",
                                                    ifelse(mediation_r_lpg_reg_table$p.value.bh<0.05, "**",
                                                           ifelse(mediation_r_lpg_reg_table$p.value.bh<0.10, "*","")))



ggplot(na.omit(mediation_r_lpg_reg_table), aes(x=perception, y=estimate,
                                      ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star,
                                      shape=term)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, x=perception, y=((estimate-1.96*std.error)-0.01)), position = position_dodge(width = .5), vjust=0.7, size = 6) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  theme_bw() +
  xlab("") + ylab("") +
  ggtitle("Mediation of Education-LPG by Perceptions in Rajasthan") +
  coord_flip() + 
  theme(legend.position = c(-1,1.01),
        legend.box.background = element_rect(colour = "black"),
        legend.title = element_blank(),
        legend.text = element_text(size=15),
        plot.title = element_text(size=18),
        axis.text = element_text(size=16),
        strip.text.x = element_text(size=16),
        strip.text.y = element_text(size=14)) +
  facet_grid(.~model)




# Figure A15 -----------------------

# Coecient plot for linear regressions showing the associations between number of years
# owning LPG and individual perceptions in Kerala households. Regressions control for covariates
# and district xed eects (not shown). Coecients are not exponentiated. Whiskers show 95%
# condence intervals and stars show statistical signicance using Benjamini-Hochberg method for
# controlling for False Discovery Rate.


pt_reg_table <- data.frame(matrix(NA, 1, 6))
colnames(pt_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value", "perception")

for (i in 1:30) {
  p <- lm(unlist(kerala[,1175+i]) ~ LPG_yearswith_25 + LPG_yearswith_510 + LPG_yearswith_10plus +
            Exp_log + NumberAdults + NumberChildrenUnder18 +
            AgeRespondent + 
            Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
            Religion_Muslim + Religion_Christian + Religion_Other + 
            Rural + factor(District), data = kerala)
  tidyp <- tidy(p)
  tidyp$perception <- perceptions_list[i]
  pt_reg_table <- rbind(pt_reg_table, tidyp[2:4,])
}

pt_reg_table <- pt_reg_table[-1,]

pt_reg_table[pt_reg_table=="LPG_yearswith_25"] <- "2-5 Years"
pt_reg_table[pt_reg_table=="LPG_yearswith_510"] <- "5-10 Years"
pt_reg_table[pt_reg_table=="LPG_yearswith_10plus"] <- ">10 Years"

pt_reg_table$p.value.bh <- p.adjust(pt_reg_table$p.value, method = "BH", n = length(pt_reg_table$p.value))
pt_reg_table$p.value.bh.star <- ifelse(pt_reg_table$p.value.bh<0.01, "***",
                                       ifelse(pt_reg_table$p.value.bh<0.05, "**",
                                              ifelse(pt_reg_table$p.value.bh<0.10, "*","")))

pt_reg_table$term <- factor(pt_reg_table$term, levels = c("2-5 Years", "5-10 Years", ">10 Years"))



ggplot(pt_reg_table, aes(x=factor(perception), y=estimate,
                         ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star,
                         shape=term)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, y = estimate - 2.25*std.error, x = factor(perception)), position = position_dodge(width = .5),vjust=0.7, size = 6) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  scale_x_discrete(limits=rev(perceptions_list_ordered)) +
  theme_bw() +
  xlab("") + ylab("") +
  coord_flip() + 
  theme(legend.title = element_blank(),
        legend.text = element_text(size=14),
        legend.position = c(0.14,0.94),
        axis.text = element_text(size=18),
        strip.text.x = element_text(size=10),
        strip.text.y = element_text(size=14)) 



# Figure A16 -----------------------

# Coecient plot for linear regressions showing the associations between number of
# years owning LPG and individual perceptions in Rajasthan households. Regressions control for
# covariates and district xed eects (not shown). Coecients are not exponentiated. Whiskers
# show 95% condence intervals and stars show statistical signicance using Benjamini-Hochberg
# method for controlling for False Discovery Rate.



rajasthan$LPG_yearswith_12 <- ifelse(rajasthan$LPG_yearswith==1, 1, 0)
rajasthan$LPG_yearswith_25 <- ifelse(rajasthan$LPG_yearswith==2, 1, 0)
rajasthan$LPG_yearswith_510 <- ifelse(rajasthan$LPG_yearswith==3, 1, 0)
rajasthan$LPG_yearswith_10plus <- ifelse(rajasthan$LPG_yearswith==4, 1, 0)
rajasthan$LPG_yearswith <- ifelse(rajasthan$LPG_yearswith==98, NA, rajasthan$LPG_yearswith)

pt_reg_table <- data.frame(matrix(NA, 1, 6))
colnames(pt_reg_table) <- c("term", "estimate", "std.error", "statistic", "p.value", "perception")

for (i in 0:49) {
  
  possibleError <- tryCatch(
    no_perception <- ifelse(is.na(unlist(rajasthan[1,1315+i]))==TRUE, 1, 0)
  )
  
  if(possibleError!=1) {
    p <- lm(unlist(rajasthan[,1315+i]) ~ LPG_yearswith_25 + LPG_yearswith_510 + LPG_yearswith_10plus +
              Exp_log + NumberAdults + NumberChildrenUnder18 +
              AgeRespondent + 
              Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
              Religion_Muslim + Religion_Christian + Religion_Other + 
              Rural + factor(District), data = rajasthan)
    tidyp <- tidy(p)
    tidyp$perception <- perceptions_rajasthan[i]
    pt_reg_table <- rbind(pt_reg_table, tidyp[2:4,])
  }
}

pt_reg_table <- pt_reg_table[-1,]

pt_reg_table[pt_reg_table=="LPG_yearswith_25"] <- "2-5 Years"
pt_reg_table[pt_reg_table=="LPG_yearswith_510"] <- "5-10 Years"
pt_reg_table[pt_reg_table=="LPG_yearswith_10plus"] <- ">10 Years"

pt_reg_table$p.value.bh <- p.adjust(pt_reg_table$p.value, method = "BH", n = length(pt_reg_table$p.value))
pt_reg_table$p.value.bh.star <- ifelse(pt_reg_table$p.value.bh<0.01, "***",
                                       ifelse(pt_reg_table$p.value.bh<0.05, "**",
                                              ifelse(pt_reg_table$p.value.bh<0.10, "*","")))

pt_reg_table$term <- factor(pt_reg_table$term, levels = c("2-5 Years", "5-10 Years", ">10 Years"))

ggplot(pt_reg_table, aes(x=factor(perception), y=estimate,
                         ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, label=p.value.bh.star,
                         shape=term)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, x = perception, y=((estimate-1.96*std.error)-0.034)), position = position_dodge(width = .5),  vjust=0.7, size = 6) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  theme_bw() +
  xlab("") + ylab("") +
  coord_flip() + 
  theme(legend.title = element_blank(),
        legend.text = element_text(size=14),
        legend.position = c(0.8,0.94),
        axis.text = element_text(size=18),
        strip.text.x = element_text(size=10),
        strip.text.y = element_text(size=14)) 






# Figure A17 -----------------


# Coecient plot for logistic regressions showing the association between CWE education
# and self-reported number of days to consume an LPG cylinder in Kerala and Rajasthan. Regressions
# control for covariates and district xed eects. Coecients are not exponentiated. Whiskers show
# 95% condence intervals. Negative coecients suggest that LPG is consumed faster and that over
# the course of a year a household consumes more LPG.



# district fixed effects
reg2_K <- lm(q126_1 ~ LPG_cylinder_cost_100 + 
               Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
               Exp_log + 
               NumberAdults + 
               NumberChildrenUnder18 +
               AgeRespondent + 
               Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
               Religion_Muslim + Religion_Christian + Religion_Other + 
               Rural, data=kerala)


# district fixed effects
reg2_R <- lm(q126_1 ~ LPG_cylinder_cost_100 +
               Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
               Exp_log + NumberAdults + 
               NumberChildrenUnder18 +
               AgeRespondent + 
               Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
               Religion_Muslim + Religion_Christian + Religion_Other + 
               Rural + 
               factor(District), data=rajasthan)


DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

reg12_df <- rbind(tidy(reg2_K) %>% mutate(model="Kerala"),
                  tidy(reg2_R) %>% mutate(model="Rajasthan")) %>%
  filter(!(term %in% DistrictFE)) %>%
  filter(!(term %in% "Caste_DKCS")) %>%
  relabel_predictors(c(q127a_1_100="LPG Cylinder Cost (Per 100 INR)",
                       Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward class",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df$p.value.bh <- p.adjust(reg12_df$p.value, method = "BH", n = length(reg12_df$p.value))
reg12_df$p.value.bh.star <- ifelse(reg12_df$p.value.bh<0.01, "***",
                                   ifelse(reg12_df$p.value.bh<0.05, "**",
                                          ifelse(reg12_df$p.value.bh<0.10, "*","")))

reg12_df[reg12_df=="Kerala"] <- "Kerala (With District FEs)"
reg12_df[reg12_df=="Rajasthan"] <- "Rajasthan (With District FEs)"


ggplot(reg12_df %>% filter(!(term=="(Intercept)")), aes(x=term, y=estimate,
                                                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                        label=p.value.bh.star,
                                                        color=model, shape=model)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  scale_x_discrete(limits=rev(covs_order)) +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey50", "black")) +
  scale_shape_manual(values=c(16,15)) +
  geom_text(aes(label=p.value.bh.star, y=((estimate+1.96*std.error)+2.5)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  ggtitle("Accounting for LPG Cylinder Cost in the Association between\n CWE Education and Days to Consume an LPG Cylinder") + 
  coord_flip() + 
  ylab("\nCoefficient Estimate") + xlab("") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.80,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22))






# Figure A18 ------------------
  
#   Coecient plot for logistic regressions showing the association between female edu-
#   cation and self-reported number of days to consume an LPG cylinder in Kerala and Rajasthan.
# Regressions control for covariates and district xed eects. Coecients are not exponentiated.
# Whiskers show 95% condence intervals. Negative coecients suggest that LPG is consumed faster
# and that over the course of a year a household consumes more LPG.


# female

reg2_f_K <- lm(q126_1 ~ LPG_cylinder_cost_100 +
                 Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
                 Exp_log + NumberAdults + NumberChildrenUnder18 +
                 AgeRespondent + 
                 Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                 Religion_Muslim + Religion_Christian + Religion_Other + 
                 Rural + factor(District), data = kerala)



reg2_f_R <- lm(q126_1 ~ LPG_cylinder_cost_100 +
                 Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
                 Exp_log + NumberAdults + NumberChildrenUnder18 +
                 AgeRespondent + 
                 Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                 Religion_Muslim + Religion_Christian + Religion_Other + 
                 Rural + 
                 factor(District), data=rajasthan)



DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

reg12_df <- rbind(tidy(reg2_f_K) %>% mutate(model="Kerala"),
                  tidy(reg2_f_R) %>% mutate(model="Rajasthan")) %>%
  filter(!(term %in% DistrictFE)) %>%
  filter(!(term %in% "Caste_DKCS")) %>%
  relabel_predictors(c(LPG_cylinder_cost_100="LPG Cylinder Cost (Per 100 INR)",
                       Education_4_FemaleSpouse_primary="Primary School",
                       Education_4_FemaleSpouse_middle_high="Middle or High School",
                       Education_4_FemaleSpouse_greaterhigh="Greater than High School",
                       Education_female_primary="Primary School",
                       Education_female_middle_high="Middle or High School",
                       Education_female_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward class",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df$p.value.bh <- p.adjust(reg12_df$p.value, method = "BH", n = length(reg12_df$p.value))
reg12_df$p.value.bh.star <- ifelse(reg12_df$p.value.bh<0.01, "***",
                                   ifelse(reg12_df$p.value.bh<0.05, "**",
                                          ifelse(reg12_df$p.value.bh<0.10, "*","")))

reg12_df[reg12_df=="Kerala"] <- "Kerala (With District FEs)"
reg12_df[reg12_df=="Rajasthan"] <- "Rajasthan (With District FEs)"


ggplot(reg12_df %>% filter(!(term=="(Intercept)")), aes(x=term, y=estimate,
                                                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                        label=p.value.bh.star,
                                                        color=model, shape=model)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  scale_x_discrete(limits=rev(covs_order)) +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey50", "black")) +
  scale_shape_manual(values=c(16,15)) +
  geom_text(aes(label=p.value.bh.star, y=((estimate+1.96*std.error)+2.5)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  ggtitle("Association between Female Education and Days to Consume an LPG Cylinder") + 
  coord_flip() + 
  ylab("\nCoefficient Estimate") + xlab("") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = "none",
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22),
        strip.text = element_text(size=20)) + 
  facet_grid(.~model)
  


# Figure A19 --------------------

# Coecient plot for logistic regressions showing the association between CWE education
# and LPG ownership n Kerala and Rajasthan, with the inclusion of district-level average LPG
# cylinder prices paid. Regressions control for covariates and district xed eects. Coecients are
# not exponentiated. Whiskers show 95% condence intervals.

covs_order_cost <- c("LPG Cylinder Cost (Per 100 INR)",
                "Primary School", "Middle or High School", "Greater than High School",
                "Total monthly household expenditure\n (Logarithmized)",
                "Number of adults (>=18 years)", "Number of children (<18 years)",
                "Age of respondent (years)", 
                "Other backward caste", "Scheduled caste", "Scheduled tribe", 
                "Muslim", "Christian", "Religion other",
                "Rural (=1)")


# district fixed effects
reg2_K <- glm(Owns_LPG ~ q127a_1_100_avg + 
                Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                Exp_log + 
                NumberAdults + 
                NumberChildrenUnder18 +
                AgeRespondent + 
                Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                Religion_Muslim + Religion_Christian + Religion_Other + 
                Rural, data=kerala_lpgcost, family = binomial("logit"), maxit = 100)


# district fixed effects
reg2_R <- glm(Owns_LPG ~ q127a_1_100_avg +
                Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                Exp_log + NumberAdults + 
                NumberChildrenUnder18 +
                AgeRespondent + 
                Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                Religion_Muslim + Religion_Christian + Religion_Other + 
                Rural + 
                factor(District), data=rajasthan_lpgcost, family = binomial("logit"), maxit = 100)


DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

reg12_df <- rbind(tidy(reg2_K) %>% mutate(model="Kerala"),
                  tidy(reg2_R) %>% mutate(model="Rajasthan")) %>%
  filter(!(term %in% DistrictFE)) %>%
  filter(!(term %in% "Caste_DKCS")) %>%
  relabel_predictors(c(q127a_1_100_avg="LPG Cylinder Cost (Per 100 INR)",
                       Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward class",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df$p.value.bh <- p.adjust(reg12_df$p.value, method = "BH", n = length(reg12_df$p.value))
reg12_df$p.value.bh.star <- ifelse(reg12_df$p.value.bh<0.01, "***",
                                   ifelse(reg12_df$p.value.bh<0.05, "**",
                                          ifelse(reg12_df$p.value.bh<0.10, "*","")))

reg12_df[reg12_df=="Kerala"] <- "Kerala (With District FEs)"
reg12_df[reg12_df=="Rajasthan"] <- "Rajasthan (With District FEs)"



ggplot(reg12_df %>% filter(!(term=="(Intercept)")), aes(x=term, y=estimate,
                                                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                        label=p.value.bh.star,
                                                        color=model, shape=model)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  scale_x_discrete(limits=rev(covs_order_cost)) +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey50", "black")) +
  scale_shape_manual(values=c(16,15)) +
  geom_text(aes(label=p.value.bh.star, y=((estimate+1.96*std.error)+0.15)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  ggtitle("Association between CWE Education and LPG Ownership") + 
  coord_flip() + 
  ylab("\nCoefficient Estimate") + xlab("") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.80,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22))




# Figure A20 --------------------

# Coecient plot for logistic regressions showing the association between CWE education
# and self-reported number of days to consume an LPG cylinder in Kerala and Rajasthan, after
# accounting for self-reported prices paid for the most recent LPG cylinder. Regressions control
# for covariates and district xed eects. Coecients are not exponentiated. Whiskers show 95%
# condence intervals. Negative coecients suggest that LPG is consumed faster and that over the
# course of a year a household consumes more LPG.

reg2_K <- lm(q126_1 ~ LPG_cylinder_cost_100 +
               Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
               Exp_log + 
               NumberAdults + 
               NumberChildrenUnder18 +
               AgeRespondent + 
               Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
               Religion_Muslim + Religion_Christian + Religion_Other + 
               Rural + 
               factor(District), data=kerala)

reg2_R <- lm(q126_1 ~ LPG_cylinder_cost_100 +
               Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
               Exp_log + NumberAdults + 
               NumberChildrenUnder18 +
               AgeRespondent + 
               Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
               Religion_Muslim + Religion_Christian + Religion_Other + 
               Rural + 
               factor(District), data=rajasthan)


DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

reg12_df <- rbind(tidy(reg2_K) %>% mutate(model="Kerala"),
                  tidy(reg2_R) %>% mutate(model="Rajasthan")) %>%
  filter(!(term %in% DistrictFE)) %>%
  filter(!(term %in% "Caste_DKCS")) %>%
  relabel_predictors(c(LPG_cylinder_cost_100="LPG Cylinder Cost (Per 100 INR)",
                       Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df$p.value.bh <- p.adjust(reg12_df$p.value, method = "BH", n = length(reg12_df$p.value))
reg12_df$p.value.bh.star <- ifelse(reg12_df$p.value.bh<0.01, "***",
                                   ifelse(reg12_df$p.value.bh<0.05, "**",
                                          ifelse(reg12_df$p.value.bh<0.10, "*","")))

reg12_df[reg12_df=="Kerala"] <- "Kerala (With District FEs)"
reg12_df[reg12_df=="Rajasthan"] <- "Rajasthan (With District FEs)"


ggplot(reg12_df %>% filter(!(term=="(Intercept)")), aes(x=term, y=estimate,
                                                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                        label=p.value.bh.star,
                                                        color=model, shape=model)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  scale_x_discrete(limits=rev(covs_order_cost)) +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey50", "black")) +
  scale_shape_manual(values=c(16,15)) +
  geom_text(aes(label=p.value.bh.star, y=((estimate+1.96*std.error)+2.5)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  ggtitle("Association between CWE Education and Days to Consume an LPG Cylinder") + 
  coord_flip() + 
  ylab("\nCoefficient Estimate") + xlab("") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22))




# Figure A21 ----------------------
  
#   Coecient plot for logistic regressions showing the association between female edu-
#   cation and self-reported number of days to consume an LPG cylinder in Kerala and Rajasthan,
# after accounting for self-reported prices paid for the most recent LPG cylinder. Regressions control
# for covariates and district xed eects. Coecients are not exponentiated. Whiskers show 95%
# condence intervals. Negative coecients suggest that LPG is consumed faster and that over the
# course of a year a household consumes more LPG.

reg2_f_K <- lm(q126_1 ~ LPG_cylinder_cost_100 +
                 Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
                 Exp_log + NumberAdults + NumberChildrenUnder18 +
                 AgeRespondent + 
                 Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                 Religion_Muslim + Religion_Christian + Religion_Other + 
                 Rural + factor(District), data = kerala)



reg2_f_R <- lm(q126_1 ~ LPG_cylinder_cost_100 + 
                 Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
                 Exp_log + NumberAdults + NumberChildrenUnder18 +
                 AgeRespondent + 
                 Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                 Religion_Muslim + Religion_Christian + Religion_Other + 
                 Rural + 
                 factor(District), data=rajasthan)



DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

reg12_df <- rbind(tidy(reg2_f_K) %>% mutate(model="Kerala"),
                  tidy(reg2_f_R) %>% mutate(model="Rajasthan")) %>%
  filter(!(term %in% DistrictFE)) %>%
  filter(!(term %in% "Caste_DKCS")) %>%
  relabel_predictors(c(LPG_cylinder_cost_100 = "LPG Cylinder Cost (Per 100 INR)",
                       Education_4_FemaleSpouse_primary="Primary School",
                       Education_4_FemaleSpouse_middle_high="Middle or High School",
                       Education_4_FemaleSpouse_greaterhigh="Greater than High School",
                       Education_female_primary="Primary School",
                       Education_female_middle_high="Middle or High School",
                       Education_female_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df$p.value.bh <- p.adjust(reg12_df$p.value, method = "BH", n = length(reg12_df$p.value))
reg12_df$p.value.bh.star <- ifelse(reg12_df$p.value.bh<0.01, "***",
                                   ifelse(reg12_df$p.value.bh<0.05, "**",
                                          ifelse(reg12_df$p.value.bh<0.10, "*","")))

reg12_df[reg12_df=="Kerala"] <- "Kerala (With District FEs)"
reg12_df[reg12_df=="Rajasthan"] <- "Rajasthan (With District FEs)"



ggplot(reg12_df %>% filter(!(term=="(Intercept)")), aes(x=term, y=estimate,
                                                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                        label=p.value.bh.star,
                                                        color=model, shape=model)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  scale_x_discrete(limits=rev(covs_order_cost)) +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey50", "black")) +
  scale_shape_manual(values=c(16,15)) +
  geom_text(aes(label=p.value.bh.star, y=((estimate+1.96*std.error)+2.5)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  ggtitle("Association between Female Education and Days to Consume an LPG Cylinder") + 
  coord_flip(ylim = c(-15, 15)) + 
  ylab("\nCoefficient Estimate") + xlab("") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = "none",
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22),
        strip.text = element_text(size=20)) +
  facet_grid(.~model)



# Figure A22 --------------------------

# Coecient plot for logistic regressions showing the association between CWE educa-
#   tion and LPG ownership in Kerala, below and above median logarithmized monthly expenditure
# households. Median monthly logarithmized montly expenditure in Kerala households was 8.64,
# which is equivalent to 5657 INR (86.83 USD). Regressions control for covariates and district xed
# eects. Coecients are not exponentiated. Whiskers show 95% condence intervals.


reg2_belowmedianincome <- glm(Owns_LPG ~ 
                                Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                                Exp_log + NumberAdults + NumberChildrenUnder18 +
                                AgeRespondent + 
                                Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                Religion_Muslim + Religion_Christian + Religion_Other + 
                                Rural + 
                                factor(District), data=kerala_belowmedian_income, family="binomial")

reg2_abovemedianincome <- glm(Owns_LPG ~ 
                                Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                                Exp_log + NumberAdults + NumberChildrenUnder18 +
                                AgeRespondent + 
                                Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                Religion_Muslim + Religion_Christian + Religion_Other + 
                                Rural + 
                                factor(District), data=kerala_abovemedian_income, family="binomial")

reg12_df_medianincome <- rbind(tidy(reg2_belowmedianincome) %>% mutate(model="Below Median Expenditures (with District FE)"),
                               tidy(reg2_abovemedianincome) %>% mutate(model="Above Median Expenditures (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df_medianincome$p.value.bh <- p.adjust(reg12_df_medianincome$p.value, method = "BH", n = length(reg12_df_medianincome$p.value))
reg12_df_medianincome$p.value.bh.star <- ifelse(reg12_df_medianincome$p.value.bh<0.01, "***",
                                                ifelse(reg12_df_medianincome$p.value.bh<0.05, "**",
                                                       ifelse(reg12_df_medianincome$p.value.bh<0.10, "*","")))

ggplot(reg12_df_medianincome %>% filter(!(term=="(Intercept)")), aes(x=term, y=estimate,
                                                                     ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                                     label=p.value.bh.star,
                                                                     color=model, shape=model)) + 
  geom_point(position = position_dodge(width = .7), size=3) +
  geom_errorbar(position = position_dodge(width = .7), size=1.5, width = 0) + 
  scale_x_discrete(limits=rev(covs_order)) +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("grey70", "grey30", "black")) +
  scale_shape_manual(values=c(16,15,14)) +
  geom_text(aes(label=p.value.bh.star, y=((estimate+1.96*std.error)+0.2)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  ggtitle("Association between CWE Education and LPG Ownership", 
          subtitle = "By Level of Monthly Expenditures, Kerala Households") +
  coord_flip(ylim = c(-3,3)) + 
  ylab("\nCoefficient Estimate") + xlab("") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.2,0.90),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22),
        strip.text = element_text(size=20))






# Figure A23 --------------------------

# Coecient plot for logistic regressions showing the association between CWE education
# and LPG ownership in Kerala households, separated by urban and rural households. Regressions
# control for covariates and district xed eects. Coecients are not exponentiated. Whiskers show
# 95% condence intervals.



reg2_rural <- glm(Owns_LPG ~ Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                    Exp_log + NumberAdults + NumberChildrenUnder18 +
                    AgeRespondent + 
                    Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                    Religion_Muslim + Religion_Christian + Religion_Other + 
                    Rural + 
                    factor(District), data=kerala_rural, family="binomial")

reg2_urban <- glm(Owns_LPG ~  Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                    Exp_log + NumberAdults + NumberChildrenUnder18 +
                    AgeRespondent + 
                    Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                    Religion_Muslim + Religion_Christian + Religion_Other + 
                    Rural + 
                    factor(District), data=kerala_urban, family="binomial")

reg12_df_rural <- rbind(tidy(reg2_rural) %>% mutate(model="Rural (with District FE)"),
                        tidy(reg2_urban) %>% mutate(model="Urban (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))



dwplot(reg12_df_rural, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  ggtitle("Association between CWE Education and LPG Ownership", 
          subtitle = "By Rural or Urban, Kerala Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=15),
        axis.title.x = element_text(size=22)) + 
  coord_cartesian(xlim=c(-3,3))



# Figure A24 --------------------------

# Coecient plot for logistic regressions showing the association between female educa-
#   tion level achieved and LPG ownership in Kerala households, separated by below and above median
# income households. Regressions control for covariates and district xed eects. Coecients are not
# exponentiated. Whiskers show 95% condence intervals.


reg2_belowmedianincome_f <- glm(Owns_LPG ~
                                  Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
                                  Exp_log + NumberAdults + NumberChildrenUnder18 +
                                  AgeRespondent +
                                  Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                  Religion_Muslim + Religion_Christian + Religion_Other +
                                  Rural, data=kerala_belowmedian_income, family="binomial")

reg2_abovemedianincome_f <- glm(Owns_LPG ~ 
                                  Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
                                  Exp_log + NumberAdults + NumberChildrenUnder18 +
                                  AgeRespondent + 
                                  Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                  Religion_Muslim + Religion_Christian + Religion_Other + 
                                  Rural + 
                                  factor(District), data=kerala_abovemedian_income, family="binomial")

reg12_df_f_medianincome <- rbind(tidy(reg2_belowmedianincome_f) %>% mutate(model="Below Median Income (with District FE)"),
                                 tidy(reg2_abovemedianincome_f) %>% mutate(model="Above Median Income (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_female_primary="Primary School",
                       Education_female_middle_high="Middle or High School",
                       Education_female_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))


dwplot(reg12_df_f_medianincome, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Below Median Income (with District FE)", 
                              "Above Median Income (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Below Median Income (with District FE)", 
                              "Above Median Income (with District FE)")) +
  ggtitle("Association between Female Education and LPG Ownership", 
          subtitle = "By Above or Below Median Income, Kerala Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=15),
        axis.title.x = element_text(size=22))






# Figure A25 --------------------------

# Coecient plot for logistic regressions showing the association between female ed-
#   ucation level achieved and LPG ownership in Kerala households, separated by urban and rural
# households. Regressions control for covariates and district xed eects. Coecients are not expo-
#   nentiated. Whiskers show 95% condence intervals.


reg2_rural_f <- glm(Owns_LPG ~ Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
                      Exp_log + NumberAdults + NumberChildrenUnder18 +
                      AgeRespondent + 
                      Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                      Religion_Muslim + Religion_Christian + Religion_Other + 
                      Rural + 
                      factor(District), data=kerala_rural, family="binomial")

reg2_urban_f <- glm(Owns_LPG ~  Education_female_primary + Education_female_middle_high + Education_female_greaterhigh +
                      Exp_log + NumberAdults + NumberChildrenUnder18 +
                      AgeRespondent + 
                      Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                      Religion_Muslim + Religion_Christian + Religion_Other + 
                      Rural + 
                      factor(District), data=kerala_urban, family="binomial")

reg12_df_rural_f <- rbind(tidy(reg2_rural_f) %>% mutate(model="Rural (with District FE)"),
                          tidy(reg2_urban_f) %>% mutate(model="Urban (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_female_primary="Primary School",
                       Education_female_middle_high="Middle or High School",
                       Education_female_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))



dwplot(reg12_df_rural_f, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  ggtitle("Association between Female Education and LPG Ownership", 
          subtitle = "By Rural or Urban, Kerala Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=15),
        axis.title.x = element_text(size=22)) + 
  coord_cartesian(xlim=c(-3,3))










# Figure A26 --------------------------

# Coecient plot for logistic regressions showing the association between CWE educa-
#   tion and LPG ownership in Rajasthan households, separated by above and below median (loga
# rithmized) monthly household expenditures. Median household expenditure was 5450 INR (83.65
# USD). Regressions control for covariates and district xed eects. Coecients are not exponentiated. 
# Whiskers show 95% condence intervals.



reg2_belowmedianincome <- glm(Owns_LPG ~ 
                                Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                                Exp_log + NumberAdults + NumberChildrenUnder18 +
                                AgeRespondent + 
                                Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                Religion_Muslim + Religion_Christian + Religion_Other + 
                                Rural + 
                                factor(District), data=rajasthan_belowmedian_income, family="binomial")

reg2_abovemedianincome <- glm(Owns_LPG ~ 
                                Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                                Exp_log + NumberAdults + NumberChildrenUnder18 +
                                AgeRespondent + 
                                Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                Religion_Muslim + Religion_Christian + Religion_Other + 
                                Rural + 
                                factor(District), data=rajasthan_abovemedian_income, family="binomial")

reg12_df_medianincome <- rbind(tidy(reg2_belowmedianincome) %>% mutate(model="Below Median Income (with District FE)"),
                               tidy(reg2_abovemedianincome) %>% mutate(model="Above Median Income (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))


dwplot(reg12_df_medianincome, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Above Median Income (with District FE)", 
                              "Below Median Income (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Above Median Income (with District FE)", 
                              "Below Median Income (with District FE)")) +
  ggtitle("Association between CWE Education and LPG Ownership", 
          subtitle = "By Above or Below Median Income, Rajasthan Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22)) + 
  coord_cartesian(xlim = c(-3,3))











# Figure A27 --------------------------

# Coecient plot for logistic regressions showing the association between female educa-
#   tion and LPG ownership in Rajasthan households, separated by above and below median income
# households. Regressions control for covariates and district xed eects. Coecients are not expo-
#   nentiated. Whiskers show 95% condence intervals.



reg2_belowmedianincome_f <- glm(Owns_LPG ~
                                  Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
                                  Exp_log + NumberAdults + NumberChildrenUnder18 +
                                  AgeRespondent +
                                  Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                  Religion_Muslim + Religion_Christian + Religion_Other +
                                  Rural, data=rajasthan_belowmedian_income, family="binomial")

reg2_abovemedianincome_f <- glm(Owns_LPG ~ 
                                  Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
                                  Exp_log + NumberAdults + NumberChildrenUnder18 +
                                  AgeRespondent + 
                                  Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                                  Religion_Muslim + Religion_Christian + Religion_Other + 
                                  Rural + 
                                  factor(District), data=rajasthan_abovemedian_income, family="binomial")

reg12_df_f_medianincome <- rbind(tidy(reg2_belowmedianincome_f) %>% mutate(model="Below Median Income (with District FE)"),
                                 tidy(reg2_abovemedianincome_f) %>% mutate(model="Above Median Income (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_4_FemaleSpouse_primary="Primary School",
                       Education_4_FemaleSpouse_middle_high="Middle or High School",
                       Education_4_FemaleSpouse_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))


dwplot(reg12_df_f_medianincome, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Below Median Income (with District FE)", 
                              "Above Median Income (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Below Median Income (with District FE)", 
                              "Above Median Income (with District FE)")) +
  ggtitle("Association between Female Education and LPG Ownership", 
          subtitle = "By Above or Below Median Income, Rajasthan Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=15),
        axis.title.x = element_text(size=22)) + 
  coord_cartesian(xlim = c(-3,3))







# Figure A28 --------------------------

# Coecient plot for logistic regressions showing the association between CWE education
# and LPG ownership in Rajasthan households, separated by urban and rural households. Regressions
# control for covariates and district xed eects. Coecients are not exponentiated. Whiskers show
# 95% condence intervals.



reg2_rural <- glm(Owns_LPG ~ Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                    Exp_log + NumberAdults + NumberChildrenUnder18 +
                    AgeRespondent + 
                    Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                    Religion_Muslim + Religion_Christian + Religion_Other + 
                    Rural + 
                    factor(District), data=rajasthan_rural, family="binomial")

reg2_urban <- glm(Owns_LPG ~  Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                    Exp_log + NumberAdults + NumberChildrenUnder18 +
                    AgeRespondent + 
                    Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                    Religion_Muslim + Religion_Christian + Religion_Other + 
                    Rural + 
                    factor(District), data=rajasthan_urban, family="binomial")

reg12_df_rural <- rbind(tidy(reg2_rural) %>% mutate(model="Rural (with District FE)"),
                        tidy(reg2_urban) %>% mutate(model="Urban (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))


dwplot(reg12_df_rural, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  ggtitle("Association between CWE Education and LPG Ownership", 
          subtitle = "By Rural or Urban, Rajasthan Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=18),
        axis.title.x = element_text(size=22)) + 
  coord_cartesian(xlim=c(-2,2))




# Figure A29 --------------------------

# Coecient plot for logistic regressions showing the association between female ed-
#   ucation and LPG ownership in Rajasthan households, separated by urban and rural households.
# Regressions control for covariates and district xed eects. Coecients are not exponentiated.
# Whiskers show 95% condence intervals.


reg2_rural_f <- glm(Owns_LPG ~ Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
                      Exp_log + NumberAdults + NumberChildrenUnder18 +
                      AgeRespondent + 
                      Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                      Religion_Muslim + Religion_Christian + Religion_Other + 
                      Rural + 
                      factor(District), data=rajasthan_rural, family="binomial")

reg2_urban_f <- glm(Owns_LPG ~  Education_4_FemaleSpouse_primary + Education_4_FemaleSpouse_middle_high + Education_4_FemaleSpouse_greaterhigh +
                      Exp_log + NumberAdults + NumberChildrenUnder18 +
                      AgeRespondent + 
                      Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                      Religion_Muslim + Religion_Christian + Religion_Other + 
                      Rural + 
                      factor(District), data=rajasthan_urban, family="binomial")

reg12_df_rural_f <- rbind(tidy(reg2_rural_f) %>% mutate(model="Rural (with District FE)"),
                          tidy(reg2_urban_f) %>% mutate(model="Urban (with District FE)")) %>%
  filter(!(term %in% DistrictFE)) %>%
  relabel_predictors(c(Education_4_FemaleSpouse_primary="Primary School",
                       Education_4_FemaleSpouse_middle_high="Middle or High School",
                       Education_4_FemaleSpouse_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

dwplot(reg12_df_rural_f, dodge_size=0.5, dot_args = list(aes(shape=model), size=4), whisker_args = list(aes(linetype=model), size=1.5)) + 
  theme_gray() + xlab("\nCoefficient Estimate") + ylab("") +
  geom_vline(xintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey50"),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  scale_shape_manual(values=c(16,15),
                     labels=c("Rural (with District FE)", 
                              "Urban (with District FE)")) +
  ggtitle("Association between Female Education and LPG Ownership", 
          subtitle = "By Rural or Urban, Rajasthan Households") +
  theme_bw() + 
  theme(plot.title=element_text(size=22),
        plot.subtitle = element_text(size=18),
        legend.title = element_blank(),
        legend.position = c(0.20,0.95),
        axis.text = element_text(size=20),
        legend.text = element_text(size=15),
        axis.title.x = element_text(size=22)) + 
  coord_cartesian(xlim=c(-3,3))






# Figure A30 --------------------------


# Coecient plot for logistic regressions showing the association between CWE education
# and LPG ownership in a combined dataset of Kerala and Rajasathan households, separated by
# urban or rural. Regressions control for covariates and district xed eects. Coecients are not
# exponentiated. Whiskers show 95% condence intervals.



merged_reg_rural <- glm(Owns_LPG ~ 
                          Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                          Exp_log + NumberAdults + NumberChildrenUnder18 +
                          AgeRespondent + 
                          Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                          Religion_Muslim + Religion_Christian + Religion_Other + 
                          factor(District), data=merged_rural, family="binomial")


merged_reg_urban <- glm(Owns_LPG ~ 
                          Education_CWE_primary + Education_CWE_middle_high + Education_CWE_greaterhigh +
                          Exp_log + NumberAdults + NumberChildrenUnder18 +
                          AgeRespondent + 
                          Caste_OBC + Caste_ScheduledCaste + Caste_ScheduledTribe + Caste_DKCS +
                          Religion_Muslim + Religion_Christian + Religion_Other +
                          factor(District), data=merged_urban, family="binomial")


DistrictFE <- c("factor(District)2", "factor(District)3", "factor(District)4", "factor(District)5", 
                "factor(District)6", "factor(District)7", "factor(District)8", 
                "factor(District)10", "factor(District)11", "factor(District)12", "factor(District)13", 
                "factor(District)14", "factor(District)15", "factor(District)16", "factor(District)17",
                "factor(District)18", "factor(District)19", "factor(District)20", "factor(District)21",
                "factor(District)22", "factor(District)23", "factor(District)24", "factor(District)25",
                "factor(District)26", "factor(District)27", "factor(District)28")

reg12_df <- rbind(tidy(merged_reg_rural) %>% mutate(model="Rural"),
                  tidy(merged_reg_urban) %>% mutate(model="Urban")) %>%
  filter(!(term %in% DistrictFE)) %>%
  filter(!(term %in% "Caste_DKCS")) %>%
  relabel_predictors(c(Education_CWE_primary="Primary School",
                       Education_CWE_middle_high="Middle or High School",
                       Education_CWE_greaterhigh="Greater than High School",
                       Exp_log="Total monthly household expenditure\n (Logarithmized)",
                       NumberAdults="Number of adults (>=18 years)",
                       NumberChildrenUnder18="Number of children (<18 years)",
                       AgeRespondent="Age of respondent (years)",
                       Caste_OBC="Other backward caste",
                       Caste_ScheduledCaste="Scheduled caste",
                       Caste_ScheduledTribe="Scheduled tribe",
                       Caste_DKCS="Caste DK/CS",
                       Religion_Muslim="Muslim",
                       Religion_Christian="Christian",
                       Religion_Other="Religion other",
                       Rural="Rural (=1)"))

reg12_df$p.value.bh <- p.adjust(reg12_df$p.value, method = "BH", n = length(reg12_df$p.value))
reg12_df$p.value.bh.star <- ifelse(reg12_df$p.value.bh<0.01, "***",
                                   ifelse(reg12_df$p.value.bh<0.05, "**",
                                          ifelse(reg12_df$p.value.bh<0.10, "*","")))


ggplot(reg12_df %>% filter(!(term=="Caste DK/CS")), aes(x=term, y=estimate,
                                                        ymin=estimate - 1.96*std.error, ymax = estimate + 1.96*std.error, 
                                                        label=p.value.bh.star,
                                                        shape=model, color=model)) + 
  geom_point(position = position_dodge(width = .5), size=3) +
  geom_errorbar(position = position_dodge(width = .5), size=1.5, width = 0) + 
  geom_text(aes(label=p.value.bh.star, x=term, y=((estimate+1.96*std.error)+0.2)), 
            position = position_dodge(width = .5), vjust=.70, size = 6,
            show.legend = F) +
  geom_hline(yintercept=0, colour="grey60", linetype=2) + 
  scale_color_manual(values=c("black", "grey70")) +
  scale_shape_manual(values=c(16,15)) +
  scale_x_discrete(limits=rev(covs_order)) +
  theme_bw() +
  xlab("") + ylab("") +
  ggtitle("Association Between CWE Educational Achievement and LPG Ownership",
          subtitle = "Combined Kerala and Rajasthan (N = 10,006)") +
  coord_flip(ylim = c(-2.5,5)) + 
  theme(legend.position = c(0.85,0.90),
        legend.title = element_blank(),
        legend.text = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=16),
        axis.text = element_text(size=15),
        strip.text.x = element_text(size=16),
        strip.text.y = element_text(size=14)) 


